# This code opens all the datasets, calculates issue voting using the regression approach. It controls for ideology
library(foreign)
library(nnet)
library(aod)
library(ggplot2)

# Set working directory to project directory
setwd(wd)

# Load policy implementation dataset
dat <- read.csv("data/implementationdata.csv")

# Get vector of election years
election_year <- c(1956, 1960, 1964, 1968, 1973, 1976, 1979, 1982, 
                   1985, 1988, 1991, 1994, 1998, 2002, 2006, 2010, 2014)

# Only keep VU (Swedish national election study)
dat <- dat[dat$survey=="vu",]

# Set working directory to directory with election datasets
setwd("data/SNES/")

changesfungov <- function(g){
  # Create dichotomous vote variable
  df$v2 <- NA
  df$v2[which(df$vote2 %in% g)] <- 1
  df$v2[which(!df$vote2 %in% g)] <- 0
  # Create dichotomous vote variable
  df$v1 <- NA
  df$v1[which(df$vote1 %in% g)] <- 1
  df$v1[which(!df$vote1 %in% g)] <- 0
  mod1 <- try(lm(v2 ~ opinion + ideology + v1,data=df))
  sig <- ifelse(summary(mod1)$coefficients[2,4]<0.05,1,0)
  se <- summary(mod1)$coefficients[2,2]
  changes <- summary(mod1)$coefficients[2,1]
  return(list(sig=sig,changes=changes,se=se))
}


# Create new variables
dat$totalchanges <- NA
dat$iv_changes_gov <- NA
dat$iv_changes_gov_sig <- NA
dat$iv_changes_gov_se <- NA
dat$govsupport <- NA
dat$govsupport_post <- NA
# Get names of datasets in folder
datasets <- sort(list.files()[grep("recoded\\.dta",list.files())]) # Sort them
datasets <- c(datasets[4:17],datasets[1:3])[-5] # move 2000s to end of vector. Remove 1970 
#because there was no panel that year

# Get election dates

# Create lists for issues
# Get names of datasets
# Go through datasets one by one
# Gains
library(readstata13) # Necessary to open datasets
for(i in 2:length(datasets)){ # Loop through datasets. Only start in 1960
  # Open 
  d <- read.dta13(datasets[i])
  year <- substr(datasets[i],1,2) # Extract two-digit year from file names
  d$vote1 <- as.factor(d$vote1) # Convert numeric vote variable to factor
  # Change numeric factors to appropriate party names
  levels(d$vote1)[which(levels(d$vote1)=="11")] <- "socdem"
  levels(d$vote1)[which(levels(d$vote1)=="12")] <- "left"
  levels(d$vote1)[which(levels(d$vote1)=="13")] <- "green"
  levels(d$vote1)[which(levels(d$vote1)=="21")] <- "mod"
  levels(d$vote1)[which(levels(d$vote1)=="22")] <- "lib"
  levels(d$vote1)[which(levels(d$vote1)=="23")] <- "center"
  levels(d$vote1)[which(levels(d$vote1)=="24")] <- "chrisdem"
  levels(d$vote1)[which(levels(d$vote1)=="31")] <- "swedem"
  levels(d$vote1)[which(levels(d$vote1)=="32")] <- "other"
  levels(d$vote1)[which(levels(d$vote1)=="40")] <- "dk"
  levels(d$vote1)[which(levels(d$vote1)=="88")] <- "dk"
  # Make all other levels missing
  levels(d$vote1)[which(!levels(d$vote1) %in% c("socdem","left","green","mod","lib","center","chrisdem","swedem","other","dk"))] <- NA
  # Convert vote 2 to factor
  d$vote2 <- as.factor(d$vote2)
  # Change numeric label names to party names
  levels(d$vote2)[which(levels(d$vote2)=="11")] <- "socdem"
  levels(d$vote2)[which(levels(d$vote2)=="12")] <- "left"
  levels(d$vote2)[which(levels(d$vote2)=="13")] <- "green"
  levels(d$vote2)[which(levels(d$vote2)=="21")] <- "mod"
  levels(d$vote2)[which(levels(d$vote2)=="22")] <- "lib"
  levels(d$vote2)[which(levels(d$vote2)=="23")] <- "center"
  levels(d$vote2)[which(levels(d$vote2)=="24")] <- "chrisdem"
  levels(d$vote2)[which(levels(d$vote2)=="31")] <- "swedem"
  levels(d$vote2)[which(levels(d$vote2)=="32")] <- "other"
  levels(d$vote2)[which(levels(d$vote2)=="40")] <- "dk"
  levels(d$vote2)[which(levels(d$vote2)=="88")] <- "dk"
  # Change all other levels to NA
  levels(d$vote2)[which(!levels(d$vote2) %in% c("socdem","left","green","mod","lib","center","chrisdem","swedem","other","dk"))] <- NA
  # Only keep cases with non-missing values on pre and post vote
  d <- d[!is.na(d$vote1) & !is.na(d$vote2),]
  
  
  # get issue variable names. These are variable names that start with upper or lower case v followed by numbers
  vars <- colnames(d)[grep("^[Vv][0-9]",colnames(d))]
  vars <- paste0("vu",year,"_",vars) # Get variable names for policy implementation dataset
  # they start with vu followed by two-digit year, underscore then the V/v number
  
  # Get full four-digit year
  if(as.numeric(year)>10){ # greater than 10 means the year must have been before 2000
    year <- as.numeric(paste0("19",year))
  }else{
    year <- as.numeric(paste0("20",year)) 
  }
  
  # Make sure all levels are in both factors
  d$vote1 <- factor(d$vote1, levels=unique(c(levels(d$vote1),levels(d$vote2))))
  d$vote2 <- factor(d$vote2, levels=unique(c(levels(d$vote1),levels(d$vote2))))                  
  # Total difference in votes. This is a control variable
  dat$votechange[dat$year==year] <- sum(abs(prop.table(table(d$vote2))-prop.table(table(d$vote1))))
  
  # Total changes for governing parties. This is a control variable. It is the change in proportions supporting 
  # governing parties
  
  # Years the social democrats came to power
  if(year %in% c(1936:1973,1982:1988,1994:2002,2014)){ 
    dat$totalchanges[which(dat$year==year)] <- mean(d$vote2=="socdem",na.rm=T) - mean(d$vote1=="socdem",na.rm=T)
  }
  # For coalitions, it should be the sums of gains of all parties in government. 
  if(year %in% c(1976,1979)){
    dat$totalchanges[which(dat$year==year)] <- mean(d$vote2 %in% c("center","mod","lib"),na.rm=T) - mean(d$vote1 %in% c("center","mod","lib"),na.rm=T)
  }
  # Years the center/moderates/liberals/christian democrats came to power      
  if(year %in% c(1991,2006,2010)){
    dat$totalchanges[which(dat$year==year)] <- mean(d$vote2 %in% c("center","mod","lib","chrisdem"),na.rm=T) - mean(d$vote1 %in% c("center","mod","lib","chrisdem"),na.rm=T)
  }
  
  # Loop through variables in election dataset
  for(j in 1:length(vars)){
    if(length(which(dat$org_var==vars[j]))==0){ # This is to make sure the v is in the right case
      # If it doesn't find a variable with that name, extract the v from the name
      # if it is in upper case, make it lower case, otherwise make uppercase
      v <- substr(strsplit(vars[j],"_")[[1]][[2]],1,1)
      v <- ifelse(toupper(v)==v, tolower(v),toupper(v))
      #
      no <- strsplit(vars[j],"_")[[1]][[2]] # Get variable number
      # Change variable number to: first part (vuyy) underscore corrected v then digits from variable number
      vars[j] <- paste0(strsplit(vars[j],"_")[[1]][[1]],"_",v,substr(no,2,nchar(no)))
    }
    
    if(length(which(dat$org_var==vars[j]))>0){ # Only keep variables that can now be found (after fixing case of v)
      if(dat$switcher[which(dat$org_var==vars[j])]==1){ # If we changed the coding of the implementation variable
        # Change the coding of the opinion variable by reversing the sign
        d[,grep("^[Vv][0-9]",colnames(d))[j]][d[,grep("^[Vv][0-9]",colnames(d))[j]]==1] <- -2
        d[,grep("^[Vv][0-9]",colnames(d))[j]][d[,grep("^[Vv][0-9]",colnames(d))[j]]==-1] <- 1 
        d[,grep("^[Vv][0-9]",colnames(d))[j]][d[,grep("^[Vv][0-9]",colnames(d))[j]]==-2] <- -1 
      }
      # create data frame with issue opinions and vote choice
      if(length(vars)>1){ # If there's more than one issue variable, select all columns with issue variables, then select
        #jth column
        df <- data.frame(opinion=c(d[,grep("^[Vv][0-9]",colnames(d))][,j]),ideology=c(d$ideology),vote1=c(d$vote1),vote2=c(d$vote2))
        # Make vote a factor
        df$vote1 <- as.factor(df$vote1)
        levels(df$vote1) <- levels(d$vote1)
        df$vote2 <- as.factor(df$vote2)
        levels(df$vote2) <- levels(d$vote2)
        o <- d[,grep("^[Vv][0-9]",colnames(d))][,j]
      }
      if(length(vars)==1){ # If there's only one issue variable, select that column
        df <- data.frame(opinion=c(d[,grep("^[Vv][0-9]",colnames(d))]),ideology=c(d$ideology),vote1=c(d$vote1),vote2=c(d$vote2))
        # Make vote a factor
        df$vote1 <- as.factor(df$vote1)
        levels(df$vote1) <- levels(d$vote1)
        df$vote2 <- as.factor(df$vote2)
        levels(df$vote2) <- levels(d$vote2)
        o <- d[,grep("^[Vv][0-9]",colnames(d))]
        
      }
      # Only calculate differences if issue opinions are not all missing and if there is some variance in issue voting
      if(mean(!is.na(o))>0 & length(unique(o))>1){
        # Rescale opinions from 0 to 1    
        df$opinion[which(df$opinion==0)] <- 0.5
        df$opinion[which(df$opinion==-1)] <- 0
        # Rescale ideology from 0 to 1
        df$ideology <- (df$ideology - min(df$ideology,na.rm=T))/(max(df$ideology,na.rm=T)-min(df$ideology,na.rm=T))
        # Give NAs median position
        df$ideology[is.na(df$ideology)] <- median(df$ideology[!is.na(df$ideology)],na.rm=T)
        # Now consider governing parties
        # Reset iv and modiv to null
        iv <- NULL
        modiv <- NULL
        if(dat$year[which(dat$org_var==vars[j])] %in% c(1936:1973,1982:1988,1994:2002,2014)){
          g <- "socdem"
          try(assign("modiv",changesfungov(g)))
          ifelse(length(modiv)>0,assign("iv",modiv$changes),NA)
          dat$iv_changes_gov[which(dat$org_var==vars[j])] <- iv    
          ifelse(length(modiv)>0,assign("iv",modiv$sig),NA)
          dat$iv_changes_gov_sig[which(dat$org_var==vars[j])] <- iv
          ifelse(length(modiv)>0,assign("iv",modiv$se),NA)
          dat$iv_changes_gov_se[which(dat$org_var==vars[j])] <- iv
          dat$govsupport[dat$org_var==vars[j]] <- dat$ps_fvr_dk[dat$org_var==vars[j]] # Proportion who support the change among supporters of govt parties
          dat$govsupport_post[dat$org_var==vars[j]] <- mean(df$opinion[which(df$vote2 %in% g)]==1,na.rm=T) # After election 
          
        }
        
        if(dat$year[which(dat$org_var==vars[j])] %in% c(1976,1979)){
          g <- c("center","mod","lib")
          try(assign("modiv",changesfungov(g)))
          ifelse(length(modiv)>0,assign("iv",modiv$changes),NA)
          dat$iv_changes_gov[which(dat$org_var==vars[j])] <- iv    
          ifelse(length(modiv)>0,assign("iv",modiv$sig),NA)
          dat$iv_changes_gov_sig[which(dat$org_var==vars[j])] <- iv
          ifelse(length(modiv)>0,assign("iv",modiv$se),NA)
          dat$iv_changes_gov_se[which(dat$org_var==vars[j])] <- iv
          dat$govsupport[dat$org_var==vars[j]] <- dat$pallians_fvr_dk[dat$org_var==vars[j]]
          dat$govsupport_post[dat$org_var==vars[j]] <- mean(df$opinion[which(df$vote2 %in% g)]==1,na.rm=T)
          
          
        }
        
        if(dat$year[which(dat$org_var==vars[j])] %in% c(1991,2006,2010)){
          g <- c("center","mod","lib","chrisdem")
          try(assign("modiv",changesfungov(g)))
          ifelse(length(modiv)>0,assign("iv",modiv$changes),NA)
          dat$iv_changes_gov[which(dat$org_var==vars[j])] <- iv    
          ifelse(length(modiv)>0,assign("iv",modiv$sig),NA)
          dat$iv_changes_gov_sig[which(dat$org_var==vars[j])] <- iv
          ifelse(length(modiv)>0,assign("iv",modiv$se),NA)
          dat$iv_changes_gov_se[which(dat$org_var==vars[j])] <- iv
          dat$govsupport[dat$org_var==vars[j]] <- dat$pallians_fvr_dk[dat$org_var==vars[j]]
          dat$govsupport_post[dat$org_var==vars[j]] <- mean(df$opinion[which(df$vote2 %in% g)]==1,na.rm=T)
        }
        
        iyear <- election_year[which(election_year==year)+1] # Get year of next election
        years <- iyear-year # Get number of years till next election. 
        var <- paste0("year",0:years,"policy") # Get all relevant policy variables
        dat$policy[dat$org_var==vars[j]] <- ifelse(mean(dat[dat$org_var==vars[j],var]==1,na.rm=T)!=0,1,0)# If a proportion greater than 0 takes the value of 1 code this variable 1
        dat$years[dat$org_var==vars[j]] <- years # Record number of years between elections
        
      }   
    }
  }
  
}

dat$totalgains <- dat$totalchanges
dat$totalgains[which(dat$totalgains<0)] <- 0
dat$totallosses <- dat$totalchanges
dat$totallosses[which(dat$totallosses>0)] <- 0


dat$gw_fvr_dk <- dat$gw_fvr_dk*100
dat$gw_fvr <- dat$gw_fvr*100
dat$pi90_fvr_dk <- dat$pi90_fvr_dk*100
dat$govsupport <- dat$govsupport*100
dat$govsupport_post <- dat$govsupport_post*100
dat$coalition <- ifelse(dat$year %in% c(1936:1973,1982:1988,1994:2002,2014),0,1)

# These issues were removed because they were either missing or weren't asked in the pre wave
dat <- dat[-which(dat$org_var %in% c("vu10_V843","vu02_V270")),] # Remove. Asked in the post survey
dat <- dat[-which(dat$org_var %in% c("vu10_V571")),] # Remove. No data on policy implementation
dat <- dat[-which(dat$org_var %in% c("vu70_V117", "vu70_V93", "vu70_V116", "vu70_V92", "vu10_V843", 
                                     "vu02_V270", "vu70_V134","vu10_V874")),] # Remove issues from 1970 and issues not in pre

# Multiply issue voting measures by 100
dat$iv_changes_gains <- dat$iv_changes_gov
dat$iv_changes_gains[which(dat$iv_changes_gains<0 & dat$iv_changes_gov_sig==1)] <- 0
dat$iv_changes_gains <- dat$iv_changes_gains*100

dat$iv_changes_losses <- dat$iv_changes_gov
dat$iv_changes_losses[which(dat$iv_changes_losses>0 & dat$iv_changes_gov_sig==1)] <- 0
dat$iv_changes_losses <- dat$iv_changes_losses*100

dat$iv_changes_gov_abs <- abs(dat$iv_changes_gov)*100
dat$iv_changes_gov <- dat$iv_changes_gov*100

# Create dummies for significant gains/losses
dat$iv_changes_gains_sig <- ifelse(dat$iv_changes_gov>0 & dat$iv_changes_gov_sig==1,1,0)
dat$iv_changes_losses_sig <- ifelse(dat$iv_changes_gov<0 & dat$iv_changes_gov_sig==1,1,0)
dat$iv_changes_gains_sig[which(dat$iv_changes_losses_sig==1)] <- NA
dat$iv_changes_losses_sig[which(dat$iv_changes_gains_sig==1)] <- NA

save(dat,file=paste0(wd,"/data/dataforstage2.Rda"))

